** This dofile is to clean and match baseline and endline data of grade 12 students 

// Step 1: Extract unique student id from "$endline/grade12_endline_4Apr2022"
* Unique student id comprises of responseid_* and schoolname_*
* Data can be saved as "$clean/grade12_match_id"

// Step 2: Clean baseline survey data and match with data from step 1
/* I have done some preliminary cleaning in "$dolfile/0.4.build_grade910students_baseline.do" 
but please double-check that dofile by reading through each line
if there is no error, you can use the clean data set saved in "$clean/grade1112_baseline_wgrade.dta" 
*/

// Step 3: Clean endline survey data and match with data from step 1 and 2
* (Some) code for cleaning is available in "$dofile/2.2.build_grade12students_endline.do"

// Step 4: Clean test score data 

// Step 5: Merge data from step 3 with data from step 0 and step 4

// Step 6: Create variables for final analysis 

/* Step 7: Merge current database with follow-up data on final outcomes:
		- follow-up survey shared by Cheku 
		- TVET administrative data 
		- administrative data on repeaters
*/
* Output of this dofile is "$clean/grade12_analysis.dta"


cap log close 
log using "$logfile/cleaning_grade12_$date.log", replace 

*******************************************************************************
*************************** step 1
clear
use "$endline/grade12_endline_4Apr2022.dta"
rename geoprisize strata
keep responseid_b schoolname_b responseid_e schoolname_e treatstudent treatschool strata
g uniqueid = _n
lab var uniqueid "unique student ID created by researcher"
save "$clean/grade12_match_id.dta", replace // unique responseid_e 5460/5540

*******************************************************************************
*************************** step 2 
do "$dofiles/0.4.build_grade1112students_baseline.do"





*******************************************************************************
*************************** step 3 

// 3.1 clean endline survey using 2.2
*global data "/Users/ye/Dropbox/ADB Bhutan TVET/Mentoring RCT2021fall/analysis/data"
import delimited "$raw/student/grade1112/20220404174151-SurveyExport.csv", varnames(1) encoding(UTF-8) clear 

// code from 2.2

* remove invalid responses
drop if thissurveyisintendedforgradexiis=="No, I am not a Grade XII student."

* rename personal information variables
cap ren whatisyourname name_original_e
lab var name_original_e "name reported at endline survey"
cap ren whatisyourgender sex
lab val sex sex 
recode sex (2=0)
lab define sex 1 "Male" 0 "Female"
cap ren whatisyourcitizenshipidcode11dig citizenid
format citizenid %15s
replace citizenid = subinstr(citizenid,".","",.)  
replace citizenid = subinstr(citizenid,"]","",.)  
replace citizenid = subinstr(citizenid,"/","",.) 
replace citizenid = trim(itrim(lower(citizenid))) 
replace citizenid = subinstr(citizenid,"na","",.) 
format citizenid %11s
replace citizenid ="" if inlist(citizenid, "00000000000","12345678910", "01234567890", "12345678902", "12345678901", "12345678912", "12345678911")
replace citizenid ="" if strlen(citizenid)~=11
cap ren inwhichdzongkhagisyourschoolloca district
cap ren dateofbirthdaywhatisyourdateofbi b_day
cap ren dateofbirthmonthwhatisyourdateof b_month
cap ren dateofbirthyearwhatisyourdateofb b_year
cap ren whatisyourstudentidcode studentid
cap ren whatisyouremailaddressprovidedby email
cap ren whatisyourprivateemailaddress email_private
cap ren whatisyourmobilephonenumber phone
cap ren whatisyouracademicstream stream
replace district = strlower(district)

* format name
replace name_original_e=lower(name_original_e)
replace name_original_e = subinstr(name_original_e,"@gemailcom","",.)   //Removes irrelevant info
replace name_original_e = subinstr(name_original_e,"@gmailcom","",.)   //Removes irrelevant info
g name = name_original_e
replace name = trim(itrim(lower(name)))
replace name = subinstr(name," ","",.)  
replace name = subinstr(name,",","",.)   //Removes comma (,)
replace name = subinstr(name,"'","",.)   //Removes apostrophe (')
replace name = subinstr(name,".","",.)   //Removes dot (.) 
replace name = subinstr(name,"/","",.)   //Removes slash (/)
replace name = subinstr(name,"-","",.)   //Removes dash (-)
replace name = subinstr(name,"=","",.)   //Removes dash (-)
replace name = subinstr(name,"(","",.)   //Removes opening parentheses
replace name = subinstr(name,")","",.)   //Removes closing parentheses
replace name = subinstr(name,"mynameis","",.)   //Removes irrelevant info
replace name = subinstr(name,"@gmailcom","",.)   //Removes irrelevant info
replace name = subinstr(name,"@gemailcom","",.)   //Removes irrelevant info
format name %25s
format name_original_e %30s

* format studentid 
replace studentid = trim(itrim(lower(studentid)))
replace studentid = subinstr(studentid,"..",".",.)  
replace studentid = subinstr(studentid," ","",.)  
replace studentid = subinstr(studentid,"@education.gov.bt","",.)   //Removes irrelevant info
replace studentid = subinstr(studentid,"@education.gov. t","",.) 
replace studentid = subinstr(studentid,"ation.gov.bt","",.) 
replace studentid = subinstr(studentid,"@educ","",.) 
replace studentid = subinstr(studentid,"@.educ","",.)
replace studentid = subinstr(studentid,"ationgov.bt","",.)  
*br responseid studentid if strlen(studentid)~=17 & ~missing(studentid)
replace studentid="" if inlist(responseid, 4152, 4516) | studentid=="na"
replace studentid="201.00342.14.0027" if studentid=="201.00342.14.0027."
replace studentid="201.00272.11.0044" if studentid=="201.00272.11.0044``201.00272.11.0044"

* create a variable of school name
rename whatisthenameofyourschool schoolname
tostring v40, replace
replace v40="" if v40=="."
forval i=33/41 {
	replace schoolname = v`i' if missing(schoolname) & ~missing(v`i')
}	
replace schoolname = strlower(schoolname)

* for partial submission: keep reponses with sufficient information to determine students 
lab var status "survey status"
keep if status=="Complete" | ///
       (status=="Partial" & ///
	   (~((missing(name_original_e) | strlen(name_original_e)<=2) & missing(studentid)) | ///
	    ~((missing(name_original_e) | strlen(name_original_e)<=2) & missing(sex) & missing(b_month) & missing(b_year) & missing(b_day))))
		
* generate variables for treatment reports in the endline surveyed
g treat_claim = didyouparticipateinthementorship==1
lab var treat_claim "students claimed to have been participating in the program"

rename howmanytimesdidyoumeetyourmentor treat_meet_person
rename v1272 treat_meet_online

rename overallsatisfactionwiththementor treat_satisfaction_mentor 
rename overallsatisfactionwiththeconten treat_satisfaction_program 

rename howlikelythatyouwillstayincontac treat_contact

rename doyouknowthatsomestudentsinyours untreated_tschool
recode untreated_tschool (2=0)
rename haveyoutalkedwithanyofyourclassm untreated_talk
rename ifthementoringhadbeenavailableto untreated_interest

rename didyouhearaboutthestudentmentori control_cschool
recode control_cschool (2=0)
rename haveyoutalkedwithanyonewhoreceiv control_talk
rename v1283 control_interest

recode untreated_interest control_interest (2=0)
  
replace phone = strlower(phone) 
replace phone = subinstr(phone,"na","",.)  
rename phone phone_e



// Check for duplicated and invalid studentid 
duplicates tag studentid, g(dup_studentid)
sort studentid status responseid

* duplicated answers, choose the earlier submission
forval i=1/3{
drop if name==name[_n-1] & studentid==studentid[_n-1] & citizenid==citizenid[_n-1] & dup_studentid~=0
drop if name==name[_n-1] & b_day==b_day[_n-1] & b_month==b_month[_n-1] & b_year==b_year[_n-1] & sex==sex[_n-1] & stream==stream[_n-1] & dup_studentid~=0 
}

drop dup_studentid
duplicates tag studentid, g(dup_studentid)

* for invalid studentID and citizenID: attempt to recover studentid later on
sort studentid responseid
rename responseid responseid_e
rename status status_e

drop if inlist(studentid, "000.00000.00.0000", "111.22222.33.4444") | strlen(studentid)~=17 | (missing(citizenid) & dup_studentid~=0)
  
  
  
// 3.2 clean endline survey: rename
lab define categories 1 "strongly disagree" 2 "disagree" 3 "neither disagree nor agree" 4 "agree" 5 "strongly agree" 99 "dicline to answer"

* Perception about education and employment
cap ren scienceandtechnologyaremakingour b2a_e
cap lab var b2a_e "perception: science makes lives healthier, easier, comfortable"
cap ren allthingsconsideredscienceandtec b2d_e
cap lab var b2d_e "perception: science makes the world better off"
cap lab val b2* categories

* How well do you think you have been prepared for the final exam?
cap ren mathhowwelldoyouthinkyouhavebeen prep_math_e
cap ren dzongkhahowwelldoyouthinkyouhave prep_lang_e
cap ren englishhowwelldoyouthinkyouhaveb prep_eng_e
cap ren overallhowwelldoyouthinkyouhaveb prep_all_e

* Education plans
cap ren iftherewerenobarriershowfarinsch c7_e
cap lab var c7_e "education plan if no barriers"
cap ren asthingsstandnowhowfarinschooldo c8_e
cap lab var c8_e "education plan as things stand now"

* Preference about employment 
cap ren incomeforyourfutureeducationalan d1a_e
cap lab var d1a_e "importance to educareer: income"
cap ren prestigeandsocialstatusforyourfu d1c_e
cap lab var d1c_e "importance to educareer: prestige"
cap ren mygenuineinterestforyourfutureed d1d_e
cap lab var d1d_e "importance to educareer: genuine interest"
cap ren advicefromparentsorguardiansfory d1f_e
cap lab var d1f_e "importance to educareer: parents advice"
cap ren whatmyfriendsthinkandchooseforyo d1g_e
cap lab var d1g_e "importance to educareer: friends choice"
cap ren goodbalancebetweenprofessionalan d1h_e
cap lab var d1h_e "importance to educareer: worklife balance"
cap ren costsofeducationforyourfutureedu d1i_e
cap lab var d1i_e "importance to educareer: costedu"

cap ren agricultureforestryandfishingwhi d2a_e
cap lab var d2a_e "ranking perferred sector: agriculture"
cap ren miningwhichsectorswouldyouprefer d2b_e
cap lab var d2b_e "ranking perferred sector: mining"
cap ren manufacturingwhichsectorswouldyo d2c_e
cap lab var d2c_e "ranking perferred sector: manufacturing"
cap ren electricitygaswatersupplywastema d2d_e
cap lab var d2d_e "ranking perferred sector: public utilities"
cap ren constructionwhichsectorswouldyou d2e_e
cap lab var d2e_e "ranking perferred sector: construction"
cap ren retailwholesaletradingwhichsecto d2f_e
cap lab var d2f_e "ranking perferred sector: trade"
cap ren hotelrestaurantfoodserviceswhich d2g_e
cap lab var d2g_e "ranking perferred sector: hotel, food"
cap ren transportationwhichsectorswouldy d2h_e
cap lab var d2h_e "ranking perferred sector: transportation"
cap ren informationandcommunicationwhich d2i_e
cap lab var d2i_e "ranking perferred sector: ict"
cap ren financialandinsuranceactivitiesw d2j_e
cap lab var d2j_e "ranking perferred sector: finance"
cap ren educationwhichsectorswouldyoupre d2k_e
cap lab var d2k_e "ranking perferred sector: education"
cap ren healthwhichsectorswouldyouprefer d2l_e
cap lab var d2l_e "ranking perferred sector: health"
cap ren entertainmentartsrecreationwhich d2m_e
cap lab var d2m_e "ranking perferred sector: entertainment"
cap ren professionalscientificandtechnic d2n_e
cap lab var d2n_e "ranking perferred sector: science"

cap ren governmentwhichofthefollowingemp d3a_e
cap lab var d3a_e "ranking preferred employer: government"
cap ren governmentownedcorporationwhicho d3b_e
cap lab var d3b_e "ranking preferred employer: soe"
cap ren privatecompanywhichofthefollowin d3c_e
cap lab var d3c_e "ranking preferred employer: private"
cap ren runningnonfarmingbusinesswhichof d3d_e
cap lab var d3d_e "ranking preferred employer: own nonfarm"
cap ren farmingwhichofthefollowingemploy d3e_e
cap lab var d3e_e "ranking preferred employer: own farm"

cap ren iknowexactlywhichoccupationiwant d4a_e
cap lab var d4a_e "perception: know exactly which occupation to work"
cap ren bluecollarjobsarepoorlyperceived d4c_e
cap lab var d4c_e "perception: blue-collar jobs are poorly perceived in society"
cap ren bluecollarjobsareinferiortowhite d4d_e
cap lab var d4d_e "perception: blue-collar jobs are inferior to white collars"
cap ren evenwithgoodpayiwouldnttakebluec d4e_e
cap lab var d4e_e "perception: even with good pay not take blue-collar jobs"
cap ren bluecollarjobsareformenrathertha d4g_e
cap lab var d4g_e "perception: blue-collar jobs are for men than women"
cap lab val d4* categories


* Knowledge about university education and TVET education 
cap ren universityeducationhowmuchdoyoua f1a_e
cap lab var f1a_e "knowledge: admission criteria to university"
cap ren tveteducationbytechnicaltraining f1b_e
cap lab var f1b_e "knowledge: admission criteria to tvet"

cap ren v104 f2a_e
cap lab var f2a_e "knowledge: application to university"
cap ren v105 f2b_e
cap lab var f2b_e "knowledge: application to tvet"

cap ren v106 f3a_e
cap lab var f3a_e "knowledge: curriculum of university"
cap ren v107 f3b_e
cap lab var f3b_e "knowledge: curriculum of tvet"

cap ren v108 f4a_e
cap lab var f4a_e "knowledge: student life at university"
cap ren v109 f4b_e
cap lab var f4b_e "knowledge: student life at tvet"

cap ren v110 f5a_e
cap lab var f5a_e "knowledge: tuition scholarship university"
cap ren v111 f5b_e
cap lab var f5b_e "knowledge: tuition scholarship tvet"

cap ren v112 f6a_e
cap lab var f6a_e "knowledge: graduation rate of university"
cap ren v113 f6b_e
cap lab var f6b_e "knowledge: graduation rate of tvet"

cap ren v114 f7a_e
cap lab var f7a_e "knowledge: future career of university"
cap ren v115 f7b_e
cap lab var f7b_e "knowledge: future career of tvet"

cap ren v116 f8a_e
cap lab var f8a_e "knowledge: overall university"
cap ren v117 f8b_e
cap lab var f8b_e "knowledge: overall tvet"

cap lab val f* categories


* which is correct: Government higher secondary schools (no endline survey index)
cap ren whichofthefollowingstatementsisc ttiizc_criteria
cap lab ttiizc_criteria "to be eligible for for training programs in TTI or IZC"
cap ren v119 registration_form
cap lab registration_form "one registration form or one for each"
cap ren whenstudentsareeligibleforthetra interview_criteria
cap lab interview_criteria "which TTIs/IZC will shortlist the students for an interview for eligible students"


* Degree ranking by genuine interest 
cap ren bachelorssciencemathcomputerscie f9a_e
cap lab var f9a_e "ranking: SCI"
cap tostring f9a_e, replace 
cap ren bachelorsengineeringpleaserankth f9b_e
cap lab var f9b_e "ranking: ENG"
cap tostring f9b_e, replace 
cap ren bachelorsmedicinehealthandnursin f9c_e
cap lab var f9c_e "ranking: MED"
cap tostring f9c_e, replace 
cap ren bachelorseducationpleaserankthef f9d_e
cap lab var f9d_e "ranking: EDU"
cap tostring f9d_e, replace 
cap ren bachelorsbusinessmanagementecono f9e_e
cap lab var f9e_e "ranking: BUS"
cap tostring f9e_e, replace 
cap ren bachelorsartshumanitiesandsocial f9f_e
cap lab var f9f_e "ranking: ARTS"
cap tostring f9f_e, replace 
cap ren technicaldegreesnationalcertific f9g_e
cap lab var f9g_e "ranking: TTI"
cap tostring f9g_e, replace 
cap ren stopeducationafterhighschoolplea f9h_e
cap lab var f9h_e "ranking: HighSchool"
cap tostring f9h_e, replace 

* Based on academic performance, how difficult to enroll/complete  (difficult to tell which is which for tvet, in the sequence of appearence of variables)
cap ren bachelorsdegreesbasedonyouracade f10a_e
cap lab var f10a_e "how difficult to enroll in university"
cap ren v130 f10b_e
cap lab var f10b_e "how difficult to enroll in tvet"

cap ren bachelorsdegreeshoweasyordifficu f11a_e
cap lab var f11a_e "how difficult to complete university"
cap ren v132 f11b_e
cap lab var f11b_e "how difficult to complete tvet"

cap ren bachelorsdegreeshowmuchdoyouthin f12a_e
cap lab var f12a_e "how much to enjoy curriculum of university"
cap ren v134 f12b_e
cap lab var f12b_e "how much to enjoy curriculum of tvet"

cap ren v135 f13a_e
cap lab var f13a_e "how much to enjoy student life of university"
cap ren v136 f13b_e
cap lab var f13b_e "how much to enjoy student life of tvet"

* Decision today 
cap ren v137 f14a_e 
cap lab var f14a "decision ranking: SCI"
cap ren bachelorsengineeringifyoutodayde f14b_e
cap lab var f14b_e "decision ranking: ENG"
cap ren v139 f14c_e
cap lab var f14c_e "decision ranking: MED"
cap ren bachelorseducationifyoutodaydeci f14d_e
cap lab var f14d_e "decision ranking: EDU"
cap ren v141 f14e_e
cap lab var f14e_e "decision ranking: BUS"
cap ren v142 f14f_e
cap lab var f14f_e "decision ranking: ARTS"
cap ren v143 f14g_e
cap lab var f14g_e "decision ranking: TTI"
cap ren stopeducationafterhighschoolifyo f14h_e
cap lab var f14h_e "decision ranking: HS"


* Preference about STEM-related education
cap ren mathhowmuchdoyoulikeeachofthefol c2a_e
cap lab var c2a_e "liking: math"
cap ren dzongkhahowmuchdoyoulikeeachofth c2b_e
cap lab var c2b_e "liking: dzongkha"
cap ren englishhowmuchdoyoulikeeachofthe c2c_e
cap lab var c2c_e "liking: english"
cap ren physicshowmuchdoyoulikeeachofthe c2d_e
cap lab var c2d_e "liking: physics"
cap ren chemistryhowmuchdoyoulikeeachoft c2e_e
cap lab var c2e_e "liking: chemistry"
cap ren biologyhowmuchdoyoulikeeachofthe c2f_e
cap lab var c2f_e "liking: biology"
cap ren historyandcivicshowmuchdoyoulike c2g_e
cap lab var c2g_e "liking: history"
cap ren geographyhowmuchdoyoulikeeachoft c2h_e
cap lab var c2h_e "liking: geography"
cap ren overallhowmuchdoyoulikeeachofthe c2i_e
cap lab var c2i_e "liking: overall"


* Sure about f14?
cap ren howsureareyouaboutyourchoicesand f15_e
cap lab var f15_e "How sure about f14"

*  Beliefs about % of other students choice
cap ren v146 f16a_m_e
cap lab var f16a_m_e "belief: % male students choosing SCI"
cap ren bachelorsengineeringwhatpercenta f16b_m_e
cap lab var f16b_m_e "belief: % male students choosing ENG"
cap ren v148 f16c_m_e
cap lab var f16c_m_e "belief: % male students choosing MED"
cap ren bachelorseducationwhatpercentage f16d_m_e
cap lab var f16d_m_e "belief: % male students choosing EDU"
cap ren v150 f16e_m_e
cap lab var f16e_m_e "belief: % male students choosing BUS"
cap ren v151 f16f_m_e
cap lab var f16f_m_e "belief: % male students choosing ARTS"
cap ren v152 f16g_m_e
cap lab var f16g_m_e "belief: % male students choosing TTI"
cap ren stopeducationafterhighschoolwhat f16h_m_e
cap lab var f16h_m_e "belief: % male students choosing HS"

cap ren v154 f16a_f_e
cap lab var f16a_f_e "belief: % female students choosing SCI"
cap ren v155 f16b_f_e
cap lab var f16b_f_e "belief: % female students choosing ENG"
cap ren v156 f16c_f_e
cap lab var f16c_f_e "belief: % female students choosing MED"
cap ren v157 f16d_f_e
cap lab var f16d_f_e "belief: % female students choosing EDU"
cap ren v158 f16e_f_e
cap lab var f16e_f_e "belief: % female students choosing BUS"
cap ren v159 f16f_f_e
cap lab var f16f_f_e "belief: % female students choosing ARTS"
cap ren v160 f16g_f_e
cap lab var f16g_f_e "belief: % female students choosing TTI"
cap ren v161 f16h_f_e
cap lab var f16h_f_e "belief: % female students choosing HS"

* TVET courses 
cap ren animationifyouenrollinatvetinsti f17a_e
cap lab var f17a_e "how interesting if enroll TVET: animation"
cap ren automobilemechanicifyouenrollina f17b_e
cap lab var f17b_e "how interesting if enroll TVET: automobile"
cap ren computerhardwareandnetworkingify f17c_e 
cap lab var f17c_e "how interesting if enroll TVET: computer"
cap ren electricalifyouenrollinatvetinst f17d_e
cap lab var f17d_e "how interesting if enroll TVET: electrics"
cap ren furnituremakingifyouenrollinatve f17e_e
cap lab var f17e_e "how interesting if enroll TVET: furniture"
cap ren heavyearthmoverifyouenrollinatve f17f_e
cap lab var f17f_e "how interesting if enroll TVET: heavy earth mover"
cap ren heavyvehicledrivingifyouenrollin f17g_e
cap lab var f17g_e "how interesting if enroll TVET: heavy vehicle drive"
cap ren jimzoifyouenrollinatvetinstituti f17h_e
cap lab var f17h_e "how interesting if enroll TVET: jimzo"
cap ren lhadriifyouenrollinatvetinstitut f17i_e
cap lab var f17i_e "how interesting if enroll TVET: lhadri"
cap ren masonryifyouenrollinatvetinstitu f17j_e
cap lab var f17j_e "how interesting if enroll TVET: masonry"
cap ren mechanicalfitterifyouenrollinatv f17k_e
cap lab var f17k_e "how interesting if enroll TVET: mechanics"
cap ren panelbeatingdentingandpantingify f17l_e
cap lab var f17l_e "how interesting if enroll TVET: panelbeating"
cap ren patrawoodcarvingifyouenrollinatv f17m_e
cap lab var f17m_e "how interesting if enroll TVET: patra"
cap ren plumbingifyouenrollinatvetinstit f17n_e
cap lab var f17n_e "how interesting if enroll TVET: plumbing"
cap ren repairmaintenanceandcommissionin f17o_e
cap lab var f17o_e "how interesting if enroll TVET: transformer"
cap ren hotelortourismmanagementifyouenr f17p_e
cap lab var f17p_e "how interesting if enroll TVET: hotel, tourism"
cap ren foodproductionifyouenrollinatvet f17q_e
cap lab var f17q_e "how interesting if enroll TVET: food prod"
cap ren tailoringifyouenrollinatvetinsti f17r_e
cap lab var f17r_e "how interesting if enroll TVET: tailoring"

* Self beliefs about population and own labor outcomes
cap ren bachelorsdegreesamongmenwhoareab g1a_m_e
cap lab var g1a_m_e "belief: pop employment with univ"
cap ren v1241 g1b_m_e
cap lab var g1b_m_e "belief: pop employment with TTI"
cap ren stopeducationafterhighschoolamon g1c_m_e
cap lab var g1c_m_e "belief: pop employment with HS"

cap ren bachelorsdegreesamongthefulltime g2a_m_e
cap lab var g2a_m_e "belief: pop earnings with univ"
cap ren technicaltraininginstitutettiori g2b_m_e
cap lab var g2b_m_e "belief: pop earnings with TTI"
cap ren v1245 g2c_m_e
cap lab var g2c_m_e "belief: pop earnings with HS"

cap ren bachelorsdegreesamongwomenwhoare g1a_f_e
cap lab var g1a_f_e "belief: pop employment with univ"
cap ren v1247 g1b_f_e
cap lab var g1b_f_e "belief: pop employment with TTI"
cap ren v1248 g1c_f_e
cap lab var g1c_f_e "belief: pop employment with HS"

cap ren v1249 g2a_f_e
cap lab var g2a_f_e "belief: pop earnings with univ"
cap ren v1250 g2b_f_e
cap lab var g2b_f_e "belief: pop earnings with TTI"
cap ren v1251 g2c_f_e
cap lab var g2c_f_e "belief: pop earnings with HS"

cap ren bachelorsdegreescomparedtoallstu g3a_e
cap lab var g3a_e "belief: relative academic perf choosing univ"
cap ren v1253 g3b_e
cap lab var g3b_e "belief: relative academic perf choosing TTI"
cap ren stopeducationafterhighschoolcomp g3c_e
cap lab var g3c_e "belief: relative academic perf choosing HS"

* Own labor market outcomes
cap ren bachelorsdegreeswhatdoyoubelieve g4a_e
cap lab var g4a_e "belief: own employment with univ"
cap ren v1256 g4b_e
cap lab var g4b_e "belief: own employment with TTI"
cap ren v1257 g4c_e
cap lab var g4c_e "belief: own employment with HS"

cap ren bachelorsdegreesimaginethatyouha g5a_e
cap lab var g5a_e "belief: own earnings with univ"
cap ren v1259 g5b_e
cap lab var g5b_e "belief: own earnings with TTI"
cap ren stopeducationafterhighschoolimag g5c_e
cap lab var g5c_e "belief: own earnings with HS"

* Approval 
cap ren bachelorsdegreesimaginethatyouch g7a_e
cap ren v1262 g7b_e
cap ren v1263 g7c_e
cap lab var g7a_e "Approval of parents if choosing univ"
cap lab var g7b_e "Approval of parents if choosing TTI"
cap lab var g7c_e "Approval of parents if choosing HS"

cap ren v1264 g8a_e
cap ren v1265 g8b_e
cap ren v1266 g8c_e
cap lab var g8a_e "Approval of friends if choosing univ"
cap lab var g8b_e "Approval of friends if choosing TTI"
cap lab var g8c_e "Approval of friends if choosing HS"

cap ren v1267 g9a_e
cap ren v1268 g9b_e
cap ren v1269 g9c_e
cap lab var g9a_e "Approval of society if choosing univ"
cap lab var g9b_e "Approval of society if choosing TTI"
cap lab var g9c_e "Approval of society if choosing HS"



//labeling

rename howwouldyoudescribeyourrelations treat_relation_mentor
rename inadditiontoofficialmeetingshowo treat_communication_freq
drop thisistheendofthesurveyifyouwant
drop v33-v41
drop a0_1 thissurveyisintendedforgradexiis a0_2 a0_3 reenteryourcitizenshipidcode11di reenteryourstudentidcode reenteryourprivateemailaddress reenteryourmobilephonenumber


lab define importance 1 "not at all important" 2 "sligtly important" 3 "somewhat important" 4 "very important" 5 "extremely important" 99 "don't know", replace
cap lab val d1* importance

lab define difficulty 1 "very difficult" 2 "quite difficult" 3 "moderate" 4 "quite easy" 5 "very easy" 99 "don't know/decline to answer", replace
cap lab val f10* f11* difficulty

lab define howwell 1 "not at all" 2 "slightly" 3 "moderate" 4 "very much" 5 "extremely" 99 "NA", replace
cap lab val prep_* howwell
cap lab val f12* f13* howwell
cap lab val c2* howwell


lab define howsure 1 "not at all sure" 2 "slightly sure" 3 "somewhat sure" 4 "moderately sure" 5 "extremely sure" 99 "decline to answer", replace
cap lab val f15_e howsure

lab define interesting 1 "not interesting" 2 "slightly interesting" 3 "moderately interesting" 4 "interesting" 5 "very interesting" 99 "don't know", replace
cap lab val f17* interesting

lab define performance 1 "very bad" 2 "bad" 3 "moderate" 4 "good" 5 "very good" 99 "don't know/decline to answer", replace
cap lab val g3* performance 

lab define relation 1 "very poor" 2 "poor" 3 "fair" 4 "good" 5 "very good", replace
cap lab val treat_relation_mentor relation

lab define rateexper 1 "very dissatisfied" 2 "dissatisfied" 3 "neutral" 4 "satisfied" 5 "very satisfied", replace
cap lab val treat_satisfaction_mentor treat_satisfaction_program rateexper

lab define freq 1 "often" 2 "sometimes" 3 "rarely" 4 "never", replace
cap lab val control_talk untreated_talk freq

save "$clean/grade12_endline.dta", replace


***********************************************************
// 3.3 merge with baseline and endline survey separately and combinedly
* match baseline unique id
use "$data/clean/grade12_match_id.dta", clear
cap rename responseid_b responseid 
cap rename schoolname_b schoolname
merge 1:m responseid schoolname using "$data/clean/grade1112_baseline_wgrade.dta"
keep if _merge==3  // 16,342 not matched, 5,540 matched
drop _merge 
rename * *_bl
rename uniqueid_bl uniqueid
save "$data/temp/grade12_baseline_merged_unique.dta", replace


* match endline unique id
use "$data/clean/grade12_endline.dta", clear 
rename *_e *
ren schoolname schoolname_e 
ren responseid responseid_e 
merge 1:m responseid_e schoolname_e using "$data/clean/grade12_match_id.dta"
keep if _merge==3  // 1294 not matched, 5433 matched
drop _merge 
cap rename studentid studentid_e
save "$data/temp/grade12_endline_merged_unique.dta", replace

* build data set for analysis 
use "$data/temp/grade12_baseline_merged_unique.dta", clear 
rename b_month_bl b_month_b
merge 1:1 uniqueid using "$data/temp/grade12_endline_merged_unique.dta"
drop _merge 

save "$temp/grade12_analysis.dta", replace

clear 
import excel "$randomization/final/randomization_TVET_mainlist_9Oct2021.xlsx", sheet("Sheet1") firstrow
keep mentorid menteeid studentid mentor_name mentor_email mentor_phone
cap rename studentid studentid_bl
merge 1:1 studentid_bl using "$temp/grade12_analysis.dta", nogen 

save "$temp/grade12_analysis.dta", replace 

* obtain mentorid 
clear 
import excel "$randomization/final/randomization_TVET_mainlist_9Oct2021.xlsx", sheet("Sheet1") firstrow
keep mentorid menteeid studentid mentor_name mentor_email mentor_phone
rename studentid studentid_bl

* merge with mentor dataset 
merge m:1 mentorid using "$clean/TVET_mentor.dta", nogen keep(matched)  // need update from TVET_mentor, rerun the code
merge 1:1 studentid_bl using "$temp/grade12_analysis.dta", nogen       

order mentor*, a(dup_studentid)  
drop dup_studentid

g double dateofbirth = b_year_bl*10^4 + b_month_b*10^2 + b_day_bl
format dateofbirth %15.0f
tostring dateofbirth, replace 
lab var dateofbirth "date of birth YYYYMMDD"

save "$temp/tvet_analysis.dta", replace


********************************************************************************
*
*		CLEAN TEST SCORE DATASET 
*
********************************************************************************
do "$dofiles/4.2.4.testscore_grade12.do"

********************************************************************************
*
*		PROCESSING DATA FOR ANALYSIS
*
********************************************************************************
use "$temp/tvet_analysis.dta", clear

* merge with score data 
merge 1:1 uniqueid using "$clean/exam_score12" 
g missing_score = _merge==1
lab var missing_score "missing score data in BHSEC exam"
drop _merge

// programming standardization with missing values 
cap program drop standardization 
program standardization, rclass 
	args i
	cap drop z_`i' 
	recode `i' (99=.)
	su `i' if CONTROL == 1, de
	local mean = r(mean)
	local sd = r(sd)
	g z_`i' = (`i' - `mean')/`sd'
end

// programming standardization with missing values 
cap program drop standardization_nomissing 
program standardization_nomissing, rclass 
	args i
	cap drop z_`i' 
	su `i' if CONTROL == 1, de
	local mean = r(mean)
	local sd = r(sd)
	g z_`i' = (`i' - `mean')/`sd'
end

// Create relevant variables for analysis: Others 
* Attrition & treatment status 
g attrition = missing(status)
lab var attrition "indicator for attrited student in first follow-up survey"

g TREAT = treatstudent_bl==1 // treated students 
lab var TREAT "indicator for treatment student"
g SPILL = treatstudent_bl==0 & treatschool_bl==1 // spillover students 
lab var SPILL "indicator for spillover student"
g CONTROL = treatstudent_bl==0 & treatschool_bl==0 
lab var CONTROL "indicator for pure control student"

g smallsample = ~(TREAT==0 & SPILL==0) // only treated school 
lab var smallsample "indicator for treatment schools"

* School variable  
egen school = group(schoolname_bl)
lab var school "school variable"

* Socioeconomic status: age, gender, parents having at least high school education, wealth 
g i_stream_art_bl = stream==1
g i_stream_com_bl = stream==2
g i_stream_sci_bl = stream==3
lab var i_stream_art_bl "Student in Arts stream"
lab var i_stream_com_bl "Student in Commerce stream"
lab var i_stream_sci_bl "Student in Science stream"

g age_bl = 2021 - b_year_bl
lab var age_bl "student age"

g male = sex_bl==1 
lab var male "indicator for being male student"

g father_edu = x3_bl>=6
g mother_edu = x7_bl>=6
replace father_edu = . if x3_bl==99
replace mother_edu = . if x7_bl==99

lab var father_edu "father completed high school and beyond"
lab var mother_edu "mother completed high school and beyond"

recode x11*_bl (2=0) // 2 means no, 1 means yes
recode x11*_bl (99=.)
pca x11*_bl
predict pc1, score
rename pc1 wealth_bl 
standardization wealth_bl 
lab var z_wealth_bl "wealth index (PCA)"

* Education-related characteristics: tvet course taken (c3), parents mentoring stream choice (c5), self-reported academic performance (c1)

/* TVET course taken (c3), how interesting if enroll TVET by subjects (f17): 
Trinh: these are not particularly informative */

g tvetcourse_bl = c2_bl == 1
lab var tvetcourse_bl "1/0 have taken any optional TVET course"

g mt_parents = c5c1_bl
lab var mt_parents "talk with parents about stream choice"
g mt_siblings = c5c2_bl
lab var mt_siblings "talk with siblings about stream choice"
g mt_friends = c5c4_bl
lab var mt_friends "talk with friends about stream choice"
g mt_teachers = c5c6_bl
lab var mt_teachers "talk with teachers about stream choice"

* academic performance should use only dzongkha & english: the two required courses 
* other courses are elective (depend on academic stream) so not very informative 
g perf_math_bl = c1a_bl*(c1a_bl~=99)/(c1a_bl~=99)
g perf_lang_bl = c1b_bl*(c1b_bl~=99)/(c1b_bl~=99)
g perf_eng_bl= c1c_bl*(c1c_bl~=99)/(c1c_bl~=99)
g perf_all_bl = c1k_bl*(c1k_bl~=99)/(c1k_bl~=99)

foreach subject in math lang eng all {
	standardization perf_`subject'_bl
}

lab var z_perf_math_bl "Subjective performance index: math"
lab var z_perf_lang_bl "Subjective performance index: dzongkha"
lab var z_perf_eng_bl "Subjective performance index: english"
lab var z_perf_all_bl "Subjective performance index: overall"

* Personality (zscore)
g big5extro_bl = ((5 - p1a_bl)*(p1a_bl~=99) + p1c_bl*(p1c_bl~=99) + p1i_bl*(p1i_bl~=99))/((p1a_bl~=99) + (p1c_bl~=99) + (p1i_bl~=99))
g big5agree_bl = ((5 - p1l_bl)*(p1l_bl~=99) + p1m_bl*(p1m_bl~=99) + p1n_bl*(p1n_bl~=99))/((p1l_bl~=99) + (p1m_bl~=99) + (p1n_bl~=99))
g big5consci_bl = (p1d_bl*(p1d_bl~=99) + (5 - p1b_bl)*(p1b_bl~=99) + p1o_bl*(p1o_bl~=99))/((p1d_bl~=99) + (p1b_bl~=99) + (p1o_bl~=99))
g big5neuro_bl = (p1e_bl*(p1e_bl~=99) + p1g_bl*(p1g_bl~=99) + (5 - p1h_bl)*(p1h_bl~=99))/((p1e_bl~=99) + (p1g_bl~=99) + (p1h_bl~=99))
g big5open_bl = (p1j_bl*(p1j_bl~=99) + p1k_bl*(p1k_bl~=99) + p1f_bl*(p1f_bl~=99))/((p1j_bl~=99) + (p1k_bl~=99) + (p1f_bl~=99))

foreach variable in big5extro big5agree big5consci big5neuro big5open {
	standardization `variable'_bl
	drop `variable'_bl
}
lab var z_big5extro_bl "Big 5 extroversion index"
lab var z_big5agree_bl "Big 5 agreeness index"
lab var z_big5consci_bl "Big 5 conscientiousness index"
lab var z_big5neuro_bl "Big 5 neuroticism index"
lab var z_big5open_bl "Big 5 openness index"

// Career-related perception: importance to educareer (d1)

g impt_income_bl = d1a_bl/(d1a_bl~=99)
g impt_prestige_bl = d1c_bl/(d1c_bl~=99)
g impt_interest_bl = d1d_bl/(d1d_bl~=99)
g impt_parents_bl = d1f_bl/(d1f_bl~=99)
g impt_friends_bl = d1g_bl/(d1g_bl~=99)
g impt_balance_bl = d1h_bl/(d1h_bl~=99)
g impt_cost_bl = d1i_bl/(d1i_bl~=99)

g impt_income = d1a/(d1a~=99)
g impt_prestige = d1c/(d1c~=99)
g impt_interest = d1d/(d1d~=99)
g impt_parents = d1f/(d1f~=99)
g impt_friends = d1g/(d1g~=99)
g impt_balance = d1h/(d1h~=99)
g impt_cost = d1i/(d1i~=99)

foreach variable in income prestige interest parents friends balance cost {
	standardization impt_`variable'
	standardization impt_`variable'_bl
}

lab var z_impt_income "Index: importance to education and career: income"
lab var z_impt_prestige "Index: importance to education and career: prestige"
lab var z_impt_interest "Index: importance to education and career: interest" 
lab var z_impt_parents "Index: importance to education and career: parents" 
lab var z_impt_friends "Index: importance to education and career: friends" 
lab var z_impt_balance "Index: importance to education and career: worklife balance"
lab var z_impt_cost "Index: importance to education and career: cost of edu"

lab var z_impt_income_bl "Index: importance to education and career: income"
lab var z_impt_prestige_bl "Index: importance to education and career: prestige"
lab var z_impt_interest_bl "Index: importance to education and career: interest" 
lab var z_impt_parents_bl "Index: importance to education and career: parents" 
lab var z_impt_friends_bl "Index: importance to education and career: friends" 
lab var z_impt_balance_bl "Index: importance to education and career: worklife balance"
lab var z_impt_cost_bl "Index: importance to education and career: cost of edu"


// Create relevant variables for analysis: Primary Outcomes 

* Primary outcome: (Pending) Actual educational choice after graduating from higher secondary school

* Primary outcome: Preferences on higher secondary education: students' self-reported ranking of educational choices including Science, Engineering, Medicine, Education, Business, Arts, Technical degrees (TVET), and stop education after high school. 

* ranking of edu choice by interest (f9); ranking of edu choice by decision (f14); 
g i_f9g = inlist(f9g, "1", "2", "3") // this question has no "I don't know" option so no missing data
g i_f9g_bl = inlist(f9g_bl, "1", "2", "3") 
replace i_f9g = . if attrition 

lab var i_f9g "TVET as top 3 choice after grade 12 genuine interest (dummy)"
lab var i_f9g_bl "TVET as top 3 choice after grade 12 genuine interest (dummy)"

g i_f14g = inlist(f14g, 1, 2, 3) // this question has no "I don't know" option so no missing data
g i_f14g_bl = inlist(f14g_bl, 1, 2, 3) 
replace i_f14g = . if attrition 

lab var i_f14g "TVET as top 3 choice after grade 12 if decide today (dummy)"
lab var i_f14g_bl "TVET as top 3 choice after grade 12 if decide today (dummy)"

g i_1f9g = inlist(f9g, "1") // this question has no "I don't know" option so no missing data
g i_1f9g_bl = inlist(f9g_bl, "1") 
replace i_1f9g = . if attrition 

lab var i_1f9g "TVET as top 1 choice after grade 12 genuine interest (dummy)"
lab var i_1f9g_bl "TVET as top 1 choice after grade 12 genuine interest (dummy)"

g i_1f14g = inlist(f14g, 1) // this question has no "I don't know" option so no missing data
g i_1f14g_bl = inlist(f14g_bl, 1) 
replace i_1f14g = . if attrition 

lab var i_1f14g "TVET as top 3 choice after grade 12 if decide today (dummy)"
lab var i_1f14g_bl "TVET as top 3 choice after grade 12 if decide today (dummy)"

* raw ranking of TVET rank_raw
destring f9g f14g f9g_bl f14g_bl, replace
g rank_raw1 = 9 - f9g 
g rank_raw2 = 9 - f14g
g rank_raw1_bl = 9 - f9g_bl
g rank_raw2_bl = 9 - f14g_bl

lab var rank_raw1 "Raw ranking of TVET by genuine interest"
lab var rank_raw2 "Raw ranking of TVET if decide today"
lab var rank_raw1_bl "Raw ranking of TVET by genuine interest"
lab var rank_raw2_bl "Raw ranking of TVET if decide today"
 
// Create relevant variables for analysis: Secondary Outcomes 
* Test score in the high school graduation exam "Bhutan Higher Secondary Education Certificate" (BHSEC) examination

foreach i in score_eng score_dzo {
	standardization_nomissing `i'
}
lab var z_score_eng "BHSEC english score index"
lab var z_score_dzo "BHSEC dzongkhag score index"
	
* Level of preparedness for exam 	
	standardization prep_lang
	standardization prep_eng
	standardization prep_all 

	lab var z_prep_lang "z-score: preparedness final dzongkha exam"
	lab var z_prep_eng "z-score: preparedness final english exam"
	
	lab var z_prep_all "z-score: preparedness final exam overall"
	
// Create relevant variables for analysis: Intermediary outcomes 	
* Index: Attitude to science and technology: b2a, b2d (zscore)
g attitude_stem_bl = (b2a_bl*(b2a_bl~=99) + b2d_bl*(b2d_bl~=99))/((b2a_bl~=99) + (b2d_bl~=99))
g attitude_stem = (b2a*(b2a~=99) + b2d*(b2d~=99))/((b2a~=99) + (b2d~=99))
standardization attitude_stem_bl
standardization attitude_stem
lab var z_attitude_stem_bl "Attitude to science and technology index"
lab var z_attitude_stem "Attitude to science and technology index"

	* specific component 
	standardization b2a_bl
	standardization b2a
	lab var z_b2a_bl "Index: science makes lives healthier easier comfortable"
	lab var z_b2a "Index: science makes lives healthier easier comfortable"
	
	standardization b2d_bl
	standardization b2d 
	lab var z_b2d_bl "Index: science makes the world better off"
	lab var z_b2d "Index: science makes the world better off"

* Index: Gender bias related to TVET and employment: -d4g (z-score)
* higher value means more biased against females in TVET 
g genderbias_tvet_bl = d4g_bl/(d4g_bl~=99)
g genderbias_tvet = d4g/(d4g~=99)
standardization genderbias_tvet
standardization genderbias_tvet_bl

lab var z_genderbias_tvet "Bias against women in TVET index"
lab var z_genderbias_tvet_bl "Bias against women in TVET index"

* Bias against blue-collar jobs
g bluecollarbias = (d4c*(d4c~=99) + d4d*(d4d~=99) + d4e*(d4e~=99))/((d4c~=99) + (d4d~=99) + (d4e~=99))
g bluecollarbias_bl = (d4c_bl*(d4c_bl~=99) + d4d_bl*(d4d_bl~=99) + d4e_bl*(d4e_bl~=99))/((d4c_bl~=99) + (d4d_bl~=99) + (d4e_bl~=99))
standardization bluecollarbias
standardization bluecollarbias_bl

lab var z_bluecollarbias "Bias against blue-collar jobs index"
lab var z_bluecollarbias_bl "Bias against blue-collar jobs index"

	* specific components
	foreach variable in d4c d4d d4e {
	standardization `variable'
	standardization `variable'_bl
	}
	
	lab var z_d4c "Index: blue-collar jobs are poorly perceived in society"
	lab var z_d4c_bl "Index: blue-collar jobs are poorly perceived in society"
	lab var z_d4d "Index: blue-collar jobs are inferior to white-collar jobs"
	lab var z_d4d_bl "Index: blue-collar jobs are inferior to white-collar jobs"
	lab var z_d4e "Index: even with good pay I wouldn’t take blue-collar jobs"
	lab var z_d4e_bl "Index: even with good pay I wouldn't take blue-collar jobs"

* Work perception 
g work = d4a/(d4a~=99)
g work_bl = d4a_bl/(d4a_bl~=99)
standardization work
standardization work_bl
lab var z_work "Index: know exactly which occupation to work"
lab var z_work_bl "Index: know exactly which occupation to work"


* Index: Subjective knowledge on TVET path regarding admission, application, curriculum, student life, scholarship, graduation rate and future career path: (zscore, f1-7b)
g subj_tvet_bl = (f1b_bl*(f1b_bl~=99) + f2b_bl*(f2b_bl~=99) + f3b_bl*(f3b_bl~=99) + f4b_bl*(f4b_bl~=99) + f5b_bl*(f5b_bl~=99) + f6b_bl*(f6b_bl~=99) + f7b_bl*(f7b_bl~=99))/((f1b_bl~=99) + (f2b_bl~=99) + (f3b_bl~=99) + (f4b_bl~=99) + (f5b_bl~=99) + (f6b_bl~=99) + (f7b_bl~=99))

g subj_tvet = (f1b*(f1b~=99) + f2b*(f2b~=99) + f3b*(f3b~=99) + f4b*(f4b~=99) + f5b*(f5b~=99) + f6b*(f6b~=99) + f7b*(f7b~=99))/((f1b~=99) + (f2b~=99) + (f3b~=99) + (f4b~=99) + (f5b~=99) + (f6b~=99) + (f7b~=99))

standardization subj_tvet_bl
standardization subj_tvet

lab var subj_tvet_bl "Subjective knowledge on TVET index"
lab var subj_tvet "Subjective knowledge on TVET index"

	* specific components
	foreach variable in f1b f2b f3b f4b f5b f6b f7b {
		standardization `variable'
		standardization `variable'_bl
	}
	
	lab var z_f1b  "Subjective knowledge on TVET index: admission"
	lab var z_f1b_bl "Subjective knowledge on TVET index: admission"
	lab var z_f2b "Subjective knowledge on TVET index: application"
	lab var z_f2b_bl "Subjective knowledge on TVET index: application"
	lab var z_f3b "Subjective knowledge on TVET index: curriculum" 
	lab var z_f3b_bl "Subjective knowledge on TVET index: curriculum"
	lab var z_f4b "Subjective knowledge on TVET index: student life" 
	lab var z_f4b_bl "Subjective knowledge on TVET index: student life"
	lab var z_f5b "Subjective knowledge on TVET index: tuition aid" 
	lab var z_f5b_bl "Subjective knowledge on TVET index: tuition aid" 
	lab var z_f6b "Subjective knowledge on TVET index: graduation rate" 
	lab var z_f6b_bl "Subjective knowledge on TVET index: graduation rate"
	lab var z_f7b "Subjective knowledge on TVET index: future career" 
	lab var z_f7b_bl "Subjective knowledge on TVET index: future career"

	
* Index: Subjective knowledge on university education regarding admission, application, curriculum, student life, scholarship, graduation rate and future career path: (zscore, f1-7b)
g subj_univ_bl = (f1a_bl*(f1a_bl~=99) + f2a_bl*(f2a_bl~=99) + f3a_bl*(f3a_bl~=99) + f4a_bl*(f4a_bl~=99) + f5a_bl*(f5a_bl~=99) + f6a_bl*(f6a_bl~=99) + f7a_bl*(f7a_bl~=99))/((f1a_bl~=99) + (f2a_bl~=99) + (f3a_bl~=99) + (f4a_bl~=99) + (f5a_bl~=99) + (f6a_bl~=99) + (f7a_bl~=99))

g subj_univ = (f1a*(f1a~=99) + f2a*(f2a~=99) + f3a*(f3a~=99) + f4a*(f4a~=99) + f5a*(f5a~=99) + f6a*(f6a~=99) + f7a*(f7a~=99))/((f1a~=99) + (f2a~=99) + (f3a~=99) + (f4a~=99) + (f5a~=99) + (f6a~=99) + (f7a~=99))

standardization subj_univ_bl
standardization subj_univ

lab var subj_univ_bl "Subjective knowledge on univ index"
lab var subj_univ "Subjective knowledge on univ index"

	* specific components
	foreach variable in f1a f2a f3a f4a f5a f6a f7a {
		standardization `variable'
		standardization `variable'_bl
	}
	
	lab var z_f1a  "Subjective knowledge on university index: admission"
	lab var z_f1a_bl "Subjective knowledge on university index: admission"
	lab var z_f2a "Subjective knowledge on university index: application"
	lab var z_f2a_bl "Subjective knowledge on university index: application"
	lab var z_f3a "Subjective knowledge on university index: curriculum" 
	lab var z_f3a_bl "Subjective knowledge on university index: curriculum"
	lab var z_f4a "Subjective knowledge on university index: student life" 
	lab var z_f4a_bl "Subjective knowledge on university index: student life"
	lab var z_f5a "Subjective knowledge on university index: tuition aid" 
	lab var z_f5a_bl "Subjective knowledge on university index: tuition aid" 
	lab var z_f6a "Subjective knowledge on university index: graduation rate" 
	lab var z_f6a_bl "Subjective knowledge on university index: graduation rate"
	lab var z_f7a "Subjective knowledge on university index: future career" 
	lab var z_f7a_bl "Subjective knowledge on university index: future career"


* Index: Objective knowledge about entry requirements and application process to TTI/IZC (endline only)
* Q 31-33
g obj_tvet = ((ttiizc_criteria==2) + (registration_form==2) + (interview_criteria==1))/(~missing(ttiizc_criteria) + ~missing(registration_form) + ~missing(interview_criteria))

standardization obj_tvet
lab var z_obj_tvet "Objective knowledge on TVET index"
cap drop obj_tvet
	
	* specific component 
	g i_obj_tvet_crt = (ttiizc_criteria==2)/(~missing(ttiizc_criteria))
	g i_obj_tvet_regis = (registration_form==2)/(~missing(registration_form))
	g i_obj_tvet_int = (interview_criteria==1)/(~missing(interview_criteria))
	
	lab var i_obj_tvet_crt "Objective knowlege: admission eligibility (dummy)"
	lab var i_obj_tvet_regis "Objective knowlege: registration (dummy)"
	lab var i_obj_tvet_int "Objective knowlege: interview criteria (dummy)"
	

* Index: Population beliefs salary (g2), employment rate (g1) of secondary school graduates, university graduates, and TVET graduates
* We will use the misperception measure: gap between baseline/endline and LFS statistics

* true information: LFS data 
g ans_g1a_m = (92+95+98+100+87+90)/6 // population employment rate with univ degree, men 
g ans_g1a_f = (80+93+83+90+71+75)/6 // population employment rate with univ degree, women
g ans_g1b_m = 94 // population employment rate with tti degree, men 
g ans_g1b_f = 91 // population employment rate with tti degree, women
g ans_g1c_m = 94 // population employment rate with HS degree, men 
g ans_g1c_f = 69 // population employment rate with HS degree, women

g ans_g2a_m = (26100+27500+27300+28300+24900+24600)/6 // population salary with tti degree, men 
g ans_g2a_f = (25900+28400+23500+28500+23200+21400)/6 // population salary with tti degree, women 
g ans_g2b_m = 20800 // population salary with tti degree, men 
g ans_g2b_f = 21100 // population salary with tti degree, women 
g ans_g2c_m = 18600 // population salary with HS degree, men 
g ans_g2c_f = 16900 // population salary with HS degree, women 

foreach variable in g1a g1b g1c g2a g2b g2c {

destring `variable'_m_bl, replace force 
destring `variable'_f_bl, replace force 
cap g i_`variable'_bl = `variable'_f_bl if sex==0
replace i_`variable'_bl = `variable'_m_bl if sex==1
destring i_`variable'_bl, replace force

destring `variable'_m, replace force 
destring `variable'_f, replace force 

recode g2a* g2b* g2c* (99=.)

cap g i_`variable' = `variable'_f if sex==0
replace i_`variable' = `variable'_m if sex==1
destring i_`variable', replace force 
}

foreach i in a b c { 
	if "`i'"=="a" {
		local degree = "univ"
	}
	if "`i'"=="b" {
		local degree = "TVET" 
	}
	if "`i'"=="c" {
		local degree = "HS" 
	}
foreach j in 1 {
cap g error_g`j'`i'_bl = i_g`j'`i'_bl - ans_g`j'`i'_m if sex==1 // male
cap replace error_g`j'`i'_bl =  i_g`j'`i'_bl - ans_g`j'`i'_f if sex==0 // female
lab var error_g`j'`i'_bl "Errors in beliefs about population employment rates with `degree' degree"

cap g error_g`j'`i' = i_g`j'`i' - ans_g`j'`i'_m if sex==1 // male
cap replace error_g`j'`i' =  i_g`j'`i' - ans_g`j'`i'_f if sex==0 // female
lab var error_g`j'`i' "Errors in beliefs about population employment rates with `degree' degree"

cap g abs_error_g`j'`i'_bl = abs(i_g`j'`i'_bl - ans_g`j'`i'_m) if sex==1 // male
cap replace abs_error_g`j'`i'_bl =  abs(i_g`j'`i'_bl - ans_g`j'`i'_f) if sex==0 // female
lab var abs_error_g`j'`i'_bl "Absolute errors in beliefs about population employment rates with `degree' degree"

cap g abs_error_g`j'`i' = abs(i_g`j'`i' - ans_g`j'`i'_m) if sex==1 // male
cap replace abs_error_g`j'`i' =  abs(i_g`j'`i' - ans_g`j'`i'_f) if sex==0 // female
lab var abs_error_g`j'`i' "Absolute errors in beliefs about population employment rates with `degree' degree"
}
}

foreach i in a b c { 
	if "`i'"=="a" {
		local degree = "univ"
	}
	if "`i'"=="b" {
		local degree = "TVET" 
	}
	if "`i'"=="c" {
		local degree = "HS" 
	}
foreach j in 2 {
cap g error_g`j'`i' =  log(i_g`j'`i') - log(ans_g`j'`i'_m/1000) if sex==1 // male
cap replace error_g`j'`i' = log(i_g`j'`i') - log(ans_g`j'`i'_m/1000) if sex==0 // female
lab var error_g`j'`i' "Errors in beliefs about population earnings with `degree' degree"

cap g abs_error_g`j'`i' =  abs(log(i_g`j'`i') - log(ans_g`j'`i'_m/1000)) if sex==1 // male
cap replace abs_error_g`j'`i' = abs(log(i_g`j'`i') - log(ans_g`j'`i'_m/1000)) if sex==0 // female
lab var abs_error_g`j'`i' "Absolute errors in beliefs about population earnings with `degree' degree"

cap g error_g`j'`i'_bl =  log(i_g`j'`i'_bl) - log(ans_g`j'`i'_m/1000) if sex==1 // male
cap replace error_g`j'`i'_bl = log(i_g`j'`i'_bl) - log(ans_g`j'`i'_m/1000) if sex==0 // female
lab var error_g`j'`i'_bl "Errors in beliefs about population earnings with `degree' degree"

cap g abs_error_g`j'`i'_bl = abs(log(i_g`j'`i'_bl) - log(ans_g`j'`i'_m/1000)) if sex==1 // male
cap replace abs_error_g`j'`i'_bl = abs(log(i_g`j'`i'_bl) - log(ans_g`j'`i'_m/1000)) if sex==0 // female
lab var abs_error_g`j'`i'_bl "Absolute errors in beliefs about population earnings with `degree' degree"
}
}

lab var i_g1a 	"Belief about employment rate (%) with univ degree"
lab var i_g1a_bl "Belief about employment rate (%) with univ degree"
lab var i_g1b 	"Belief about employment rate (%) with tti degree"
lab var i_g1b_bl "Belief about employment rate (%) with tti degree"
lab var i_g1c 	"Belief about employment rate (%) with HS degree"
lab var i_g1c_bl "Belief about employment rate (%) with HS degree"
lab var i_g2a 	"Belief about salary (Nu 000) with univ degree"
lab var i_g2a_bl "Belief about salary (Nu 000) with univ degree"
lab var i_g2b 	"Belief about salary (Nu 000) with tti degree"
lab var i_g2b_bl "Belief about salary (Nu 000) with tti degree"
lab var i_g2c 	"Belief about salary (Nu 000) with HS degree"
lab var i_g2c_bl "Belief about salary (Nu 000) with HS degree"

gen attentiveness = 1 if ifyouunderstandtheinstructioncle == 2
lab var attentiveness "student understands the questions well (endline)"

* zscore Expected satisfaction on tvet: f12,f13
g satisfied_tvet = (f12b*(f12b~=99) + f13b*(f13b~=99))/((f12b~=99) + (f13b~=99))
standardization satisfied_tvet

g satisfied_tvet_bl = (f12b_bl*(f12b_bl~=99) + f13b_bl*(f13b_bl~=99))/((f12b_bl~=99) + (f13b_bl~=99))
standardization satisfied_tvet_bl

standardization f12b
standardization f12b_bl
standardization f13b
standardization f13b_bl

lab var z_satisfied_tvet "Expected satisfaction on TVET index"
lab var z_satisfied_tvet_bl "Expected satisfaction on TVET index"

lab var z_f12b "Expected satisfaction on TVET curriculum index"
lab var z_f12b_bl "Expected satisfaction on TVET curriculum index"
lab var z_f13b "Expected satisfaction on TVET student life index"
lab var z_f13b_bl "Expected satisfaction on TVET student life index"

* Index: zscore Beliefs about approval of parents, peers, and society if enrolling in tvet

foreach var in g7b g8b g9b g7b_bl g8b_bl g9b_bl {
	destring `var', replace force
}

* these variables do not code 99 as missing, use the same standardization program as test score
cap drop approval* 
g approval_tvet = (g7b + g8b + g9b)/3
g approval_tvet_bl = (g7b_bl + g8b_bl+ g9b_bl)/3

standardization_nomissing approval_tvet
standardization_nomissing approval_tvet_bl
 
lab var z_approval_tvet "Approval index if choosing TVET"
lab var z_approval_tvet_bl "Approval index if choosing TVET"

	* specific component 
	standardization_nomissing g7b
	standardization_nomissing g7b_bl
	lab var z_g7b "Approval index if choosing TVET: parents"
	lab var z_g7b_bl "Approval index if choosing TVET: parents"

	standardization_nomissing g8b
	standardization_nomissing g8b_bl
	lab var z_g8b "Approval index if choosing TVET: friends"
	lab var z_g8b_bl "Approval index if choosing TVET: friends"

	standardization_nomissing g9b
	standardization_nomissing g9b_bl
	lab var z_g9b "Approval index if choosing TVET: society"
	lab var z_g9b_bl "Approval index if choosing TVET: society"

	
* zscore Beliefs about peers' preferences on TTI: f16g 
foreach i in f m {
replace f16g_`i'=substr(f16g_`i', 1, length(f16g_`i') - 1)
destring f16g_`i', replace 
}
foreach i in f m {
replace f16g_`i'_bl=substr(f16g_`i'_bl, 1, length(f16g_`i'_bl) - 1)
destring f16g_`i'_bl, replace 
}

g f16g = f16g_f if sex==0
replace f16g = f16g_m if sex==1
standardization_nomissing f16g
lab var z_f16g "Belief index about peers' preference in TVET"

g f16g_bl = f16g_f_bl if sex==0
replace f16g_bl = f16g_m_bl if sex==1
standardization_nomissing f16g_bl
lab var z_f16g_bl "Belief index about peers' preference in TVET"

* Index: Subjective assessment of ability to enroll (f10b) and complete (f11b) tvet
cap drop ability  
g ability = (f10b*(f10b~=99) + f11b*(f11b~=99))/((f10b~=99) + (f11b~=99))
g ability_bl = (f10b_bl*(f10b_bl~=99) + f11b_bl*(f11b_bl~=99))/((f10b_bl~=99) + (f11b_bl~=99))

standardization ability 
standardization ability_bl
lab var z_ability "Subjective assessment to enroll and complete TVET index"
lab var z_ability_bl "Subjective assessment to enroll and complete TVET index"

	* specific component 
	standardization f10b
	standardization f10b_bl
	lab var z_f10b "Subjective assessment to enroll in TVET index"
	lab var z_f10b_bl "Subjective assessment to enroll in TVET index"

	standardization f11b
	standardization f11b_bl
	lab var z_f11b "Subjective assessment to complete TVET index"
	lab var z_f11b_bl "Subjective assessment to complete TVET index"

	
* Beliefs about future employment outcomes (g4, g5)
* 99 does not mean missing for g4*
foreach variable in g4a g4b g4c {
	destring `variable', g(i_`variable') force
	destring `variable'_bl, g(i_`variable'_bl) force
}

foreach variable in g5a g5b g5c {
	cap g i_`variable' = `variable'
	cap g i_`variable'_bl = `variable'_bl
	recode i_`variable' i_`variable'_bl (99=.)
}

lab variable i_g4a "Belief on own employment (\%) with univ degree"
lab variable i_g4a_bl "Belief on own employment (\%) with univ degree"
lab variable i_g4b "Belief on own employment (\%) with TVET degree"
lab variable i_g4b_bl "Belief on own employment (\%) with TVET degree"
lab variable i_g4c "Belief on own employment (\%) with high school diploma"
lab variable i_g4c_bl "Belief on own employment (\%) with high school diploma"

lab variable i_g5a "Belief on own earnings (Nu 000) with univ degree"
lab variable i_g5a_bl "Belief on own earnings (Nu 000) with univ degree"
lab variable i_g5b "Belief on own earnings (Nu 000) with TVET degree"
lab variable i_g5b_bl "Belief on own earnings (Nu 000) with TVET degree"
lab variable i_g5c "Belief on own earnings (Nu 000) with high school diploma"
lab variable i_g5c_bl "Belief on own earnings (Nu 000) with high school diploma"


* Preference on employment
* Preferences on sectors to work in the future: d2 (having a TVET-related sectors as top 3)
// g i_d2 = inlist(d2c, 1, 2) | inlist(d2e, 1, 2) | inlist(d2g, 1, 2) | inlist(d2h, 1, 2) | inlist(d2i, 1, 2)
// g i_d2_e = inlist(d2c_e, 1, 2) | inlist(d2e_e, 1, 2) | inlist(d2g_e, 1, 2) | inlist(d2h_e, 1, 2) | inlist(d2i_e, 1, 2)
// replace i_d2_e = . if attrition 
// lab var i_d2 "1/0: having TVET-related sectors as top 2 job sector choice (baseline)"
// lab var i_d2_e "1/0: having TVET-related sectors as top 2 job sector choice (endline)"

foreach i in a b c d e f g h i j k l m n {
	cap drop i_d2`i' i_d2`i'_bl
	cap g i_d2`i' = inlist(d2`i', 1, 2, 3) 
	cap g i_d2`i'_bl = inlist(d2`i'_bl, 1, 2, 3) 
	replace i_d2`i'_bl = .  if attrition 
}

lab var i_d2a "Agriculture as top 3 job sector choice (dummy)"
lab var i_d2a_bl "Agriculture as top 3 job sector choice (dummy)"
lab var i_d2b "Mining as top 3 job sector choice (dummy)"
lab var i_d2b_bl "Mining as top 3 job sector choice (dummy)"
lab var i_d2c "Manufacturing as top 3 job sector choice (dummy)"
lab var i_d2c_bl "Manufacturing as top 3 job sector choice (dummy)"
lab var i_d2d "ublic utilities as top 3 job sector choice (dummy)" 
lab var i_d2d_bl "Public utilities as top 3 job sector choice (dummy)"
lab var i_d2e "Construction as top 3 job sector choice (dummy)" 
lab var i_d2e_bl "Construction as top 3 job sector choice (dummy)"
lab var i_d2f "Trade as top 3 job sector choice (dummy)"
lab var i_d2f_bl "Trade as top 3 job sector choice (dummy)"
lab var i_d2g "Hotal and food as top 3 job sector choice (dummy)" 
lab var i_d2g_bl "Hotal and food as top 3 job sector choice (dummy)" 

lab var i_d2h "Transportation as top 3 job sector choice (dummy)"
lab var i_d2h_bl "Transportation as top 3 job sector choice (dummy)"
lab var i_d2i "ICT as top 3 job sector choice (dummy)" 
lab var i_d2i_bl "ICT as top 3 job sector choice (dummy)" 
lab var i_d2j "Finance as top 3 job sector choice (dummy)"
lab var i_d2j_bl "Finance as top 3 job sector choice (dummy)"
lab var i_d2k "Education as top 3 job sector choice (dummy)"
lab var i_d2k_bl "Education as top 3 job sector choice (dummy)"
lab var i_d2l "Health as top 3 job sector choice (dummy)" 
lab var i_d2l_bl "Health as top 3 job sector choice (dummy)"
lab var i_d2m "Entertainment as top 3 job sector choice (dummy)"
lab var i_d2m_bl "Entertainment as top 3 job sector choice (dummy)"
lab var i_d2n "Science as top 3 job sector choice (dummy)" 
lab var i_d2n_bl "Science as top 3 job sector choice (dummy)" 


// Mentor and mentee 
* gender combination

cap g genderindex=.
replace genderindex = 1 if mentor_male==1 & male==1
replace genderindex = 2 if mentor_male==0 & male==0
replace genderindex = 3 if mentor_male==1 & male==0
replace genderindex = 4 if mentor_male==0 & male==1
lab var genderindex "mentor-mentee gender pair"
lab def genderindex 1 "male-male" 2 "female-female" 3 "male-female" 4 "female-male"
lab val genderindex genderindex

// merge with potential share of mentees 
preserve 
do "$dofiles/4.0.4.potential_share_mentees_g12.do"
restore 

merge m:1 schoolname_bl using "$clean/sharementee_grade12.dta", nogen keep(matched)
lab var potentialshare "Potential share of mentees in each school"

save "$temp/grade12_analysis.dta", replace 


// merge with degree decision data from followup survey, repeat list, and TVET database

 do "$dofiles/4.2.1.degree_outcome_grade12.do"

// merge with TVET list 

 do "$dofiles/4.2.2.tvet_admission_grade12.do"

// merge with repeating grade data 

 do "$dofiles/4.2.3.repeat_grade12.do"


********************************* merge data together **************************
clear 
use "$temp/grade12_analysis.dta", clear 
merge 1:1 uniqueid using "$temp/grade12_analysis_survey", nogen force
merge 1:1 uniqueid using "$temp/grade12_analysis_tvetadm", nogen force
merge 1:1 uniqueid using "$temp/grade12_analysis_repeat", nogen force

// merge with mentor report dataset 
preserve 
keep if ~missing(menteeid)
replace menteeid = lower(menteeid)
merge 1:1 menteeid using "$clean/mentor_report", nogen keep(master match)
tempfile data1 
save `data1'
restore 
drop if ~missing(menteeid)
append using `data1'

gen actualTREAT = 1 if ~missing(num_meeting)
replace actualTREAT = 0 if missing(num_meeting)

gen actualTREAT4 = 1 if num_meeting == 4
replace actualTREAT4 = 0 if num_meeting~=4

lab var actualTREAT "participated in the program at least 1"
lab var actualTREAT4 "participated in all 4 meetings"

g o_finoutcomes = 1 if s_survey_data == 1 | a_adm_data == 1 | r_repeat_data == 1
lab var o_finoutcomes "final outcome available in either survey/adm/repeat data"

replace o_finoutcomes = 0 if missing(o_finoutcomes)

replace s_apptvet = "Yes" if regexm(s_apptvet, "Yes")


// recode all missing outcomes as "0" for administrative data 
recode a_app_tvet a_adm_tvet a_adm_tvet_dr a_adm_tvet_drpr a_app_tvet_dr a_app_tvet_drpr (.=0) 
recode r_repeat_data (.=0)

***** KEEP RELEVANT VARIABLES FOR ANALYSIS 

gen TREAT_MM = genderindex == 1
gen TREAT_FF = genderindex == 2
gen TREAT_MF = genderindex == 3
gen TREAT_FM = genderindex == 4

gen TREAT_SAME = TREAT_MM == 1 | TREAT_FF == 1
gen TREAT_DIFF = TREAT_MF == 1 | TREAT_FM == 1

lab var TREAT_MM "Male Mentor - Male Mentee"
lab var TREAT_FF "Female Mentor - Female Mentee"
lab var TREAT_MF "Male Mentor - Female Mentee"
lab var TREAT_FM "Female Mentor - Male Mentee"
lab var TREAT_SAME "Same Gender Mentor-Mentee"
lab var TREAT_DIFF "Different Genders Mentor-Mentee"

cap ren attrition attrition1 

gen attrition2 = (s_survey_data~=1)
lab var attrition2 "indicator for attrited student in the second follow-up survey"

ren missing_score attrition_test

cap gen treat_incontact = inlist(treat_contact, "Maybe", "Likely", "Very likely")
replace treat_incontact = . if missing(treat_contact) 

cap gen treat_incontactL = inlist(treat_contact, "Likely", "Very likely")
replace treat_incontactL = . if missing(treat_contact)

cap drop strata
ren strata_bl strata

* Primary outcomes and values at baseline 
gl primary1 "i_f9g i_f14g rank_raw1 rank_raw2"
gl primary1_bl "i_f9g_bl i_f14g_bl i_1f9g_bl i_1f14g_bl rank_raw1_bl rank_raw2_bl"

gl primary2 "a_app_tvet a_adm_tvet s_app_tvet s_adm_tvet"
lab var a_app_tvet "Administrative: Application to TVET (dummy)"
lab var a_adm_tvet "Administrative: Admission to TVET (dummy)"
lab var s_app_tvet "Survey: Application to TVET (dummy)"
lab var s_adm_tvet "Survey: Admission to TVET (dummy)"

* Secondary outcomes
gl secondary "z_prep_lang z_prep_eng z_prep_all z_score_eng z_score_dzo passing_bhsec"

* Intermediate outcomes and values at baseline 
gl intermediate1 "z_impt_income z_impt_prestige z_impt_interest z_impt_parents z_impt_friends z_impt_balance z_impt_cost"
gl intermediate1_bl "z_impt_income_bl z_impt_prestige_bl z_impt_interest_bl z_impt_parents_bl z_impt_friends_bl z_impt_balance_bl z_impt_cost_bl"

gl intermediate2 "z_attitude_stem z_b2a z_b2d"
gl intermediate2_bl "attitude_stem_bl z_attitude_stem_bl z_b2a_bl z_b2d_bl"

gl intermediate3 "z_genderbias_tvet z_bluecollarbias z_d4c z_d4d z_d4e"
gl intermediate3_bl "genderbias_tvet_bl z_genderbias_tvet_bl bluecollarbias_bl z_bluecollarbias_bl z_d4c_bl z_d4d_bl z_d4e_bl"

gl intermediate4 "z_subj_tvet z_f1b z_f2b z_f3b z_f4b z_f5b z_f6b z_f7b"
gl intermediate4_bl "subj_tvet_bl z_subj_tvet_bl z_f1b_bl z_f2b_bl z_f3b_bl z_f4b_bl z_f5b_bl z_f6b_bl z_f7b_bl"

gl intermediate5 "z_obj_tvet i_obj_tvet_crt i_obj_tvet_regis i_obj_tvet_int"

gl intermediate6 "z_satisfied_tvet z_f12b z_f13b"
gl intermediate6_bl "z_satisfied_tvet_bl z_f12b_bl z_f13b_bl"

gl intermediate7 "z_approval_tvet z_g7b z_g8b z_g9b"
gl intermediate7_bl "approval_tvet_bl z_approval_tvet_bl z_g7b_bl z_g8b_bl z_g9b_bl"

gl intermediate8 "z_f16g i_g4b  i_g5b "
gl intermediate8_bl "z_f16g_bl i_g4b_bl i_g5b_bl"

gl intermediate9 "z_ability_tvet z_f10b z_f11b"
gl intermediate9_bl "z_ability_tvet_bl z_f10b_bl z_f11b_bl"

ren z_ability z_ability_tvet 
ren z_ability_bl z_ability_tvet_bl

gl intermediate10 "abs_error_g1b abs_error_g2b"
gl intermediate10_bl "error_g1a_bl error_g1b_bl error_g1c_bl error_g2a_bl error_g2b_bl error_g2c_bl abs_error_g1b_bl abs_error_g2b_bl"

* Control variables 
gl students_ses "male age_bl father_edu mother_edu z_wealth_bl z_big5extro_bl z_big5agree_bl z_big5consci_bl z_big5neuro_bl z_big5open_bl"
gl students_aca "z_perf_math_bl z_perf_lang_bl z_perf_eng_bl z_perf_all_bl"
gl mentors "zm_b5extro zm_b5agree zm_b5consci zm_b5neuro zm_b5open zm_genderbias"
gl others "attrition1 attrition2 potentialshare mt_* attrition_test attentiveness* tvetcourse_bl f15_bl f15"
gl feedback "treat_satisfaction_mentor treat_satisfaction_program treat_incontact treat_incontactL"
gl meeting "num_meeting meeting_minutes1 meeting_minutes2 meeting_minutes3 meeting_minutes4 qual_meeting num_online grade interval communication communication_m"

foreach variable of global feedback {
	replace `variable' = . if TREAT!=1
}



keep uniqueid strata school TREAT* actualTREAT* SPILL CONTROL $primary1 ///
$primary1_bl $primary2 $secondary $intermediate1 $intermediate1_bl ///
$intermediate2 $intermediate2_bl $intermediate3 $intermediate3_bl ///
$intermediate4 $intermediate4_bl $intermediate5 ///
$intermediate6 $intermediate6_bl $intermediate7 $intermediate7_bl ///
$intermediate8 $intermediate8_bl $intermediate9 $intermediate9_bl ///
$intermediate10 $intermediate10_bl ///
$students_ses $students_aca $mentors $others $feedback $meeting

lab var age_bl "Age"
lab var male "Male"
lab var father_edu "Father with high school diploma (dummy)"
lab var mother_edu "Mother with high school diploma (dummy)"
lab var z_wealth_bl "Wealth index (PCA)"
lab var potentialshare "Potential share of mentees"
lab var z_subj_tvet_bl "Subjective knowledge on TVET index"
lab var z_subj_tvet "Subjective knowledge on TVET index"
lab var approval_tvet_bl "Beliefs about approval of parents, friends and society (percent)"
lab var genderbias_tvet_bl "Bias against women in STEM (5-point Likert)"
lab var bluecollarbias_bl "Bias against blue-collar jobs (5-point Likert)"
lab var attitude_stem_bl "Attitude toward science and technology (5-point Likert)"
lab var mt_parents "Discuss with parents about career choice (dummy)" 
lab var mt_siblings "Discuss with siblings about career choice (dummy)"  
lab var mt_friends "Discuss with friends about career choice (dummy)"  
lab var mt_teachers "Discuss with teachers about career choice (dummy)"
lab var treat_incontact "Maybe, likely or very likely to keep in touch with mentors"
lab var treat_incontactL "Likely or very likely to keep in touch with mentors"
lab def male 0 "Female" 1 "Male", modify
lab val male male

drop grade 
 
save "$clean/grade12_analysis", replace 








********************************************************************************
********************************************************************************
******************* BASELINE CHARACTERISTICS OF THE WHOLE SAMPLE ***************
********************************************************************************
********************************************************************************
foreach filename in grade1112_baseline_reachable_wgrade all_grade1112_baseline {
	
use "$clean/`filename'.dta", clear

rename * *_bl

* Socioeconomic status: age, gender, parents having at least high school education, wealth 
g age_bl = 2021 - b_year_bl

g male = sex_bl==1 
lab var age_bl "Age"
lab var male "Male"

g father_edu = x3_bl>=6
g mother_edu = x7_bl>=6
replace father_edu = . if x3_bl==99
replace mother_edu = . if x7_bl==99

lab var father_edu "Father with high school diploma (dummy)"
lab var mother_edu "Mother with high school diploma (dummy)"

recode x11*_bl (2=0) // 2 means no, 1 means yes
recode x11*_bl (99=.)
pca x11*_bl
predict pc1, score
rename pc1 wealth_bl 

* ranking of edu choice by interest (f9); ranking of edu choice by decision (f14); 
g i_f9g_bl = inlist(f9g_bl, "1", "2", "3") 
lab var i_f9g_bl "TVET as top 3 choice after grade 12 genuine interest (dummy)"

g i_f14g_bl = inlist(f14g_bl, 1, 2, 3) 
lab var i_f14g_bl "TVET as top 3 choice after grade 12 if decide today (dummy)"

* Personality 
g big5extro_bl = ((5 - p1a_bl)*(p1a_bl~=99) + p1c_bl*(p1c_bl~=99) + p1i_bl*(p1i_bl~=99))/((p1a_bl~=99) + (p1c_bl~=99) + (p1i_bl~=99))
g big5agree_bl = ((5 - p1l_bl)*(p1l_bl~=99) + p1m_bl*(p1m_bl~=99) + p1n_bl*(p1n_bl~=99))/((p1l_bl~=99) + (p1m_bl~=99) + (p1n_bl~=99))
g big5consci_bl = (p1d_bl*(p1d_bl~=99) + (5 - p1b_bl)*(p1b_bl~=99) + p1o_bl*(p1o_bl~=99))/((p1d_bl~=99) + (p1b_bl~=99) + (p1o_bl~=99))
g big5neuro_bl = (p1e_bl*(p1e_bl~=99) + p1g_bl*(p1g_bl~=99) + (5 - p1h_bl)*(p1h_bl~=99))/((p1e_bl~=99) + (p1g_bl~=99) + (p1h_bl~=99))
g big5open_bl = (p1j_bl*(p1j_bl~=99) + p1k_bl*(p1k_bl~=99) + p1f_bl*(p1f_bl~=99))/((p1j_bl~=99) + (p1k_bl~=99) + (p1f_bl~=99))
lab var big5extro_bl "Big 5 extroversion"
lab var big5agree_bl "Big 5 agreeness"
lab var big5consci_bl "Big 5 conscientiousness"
lab var big5neuro_bl "Big 5 neuroticism"
lab var big5open_bl "Big 5 openness"

// Create relevant variables for analysis: Intermediary outcomes 	
* Index: Attitude to science and technology: b2a, b2d (zscore)
g attitude_stem_bl = (b2a_bl*(b2a_bl~=99) + b2d_bl*(b2d_bl~=99))/((b2a_bl~=99) + (b2d_bl~=99))
lab var attitude_stem_bl "Attitude to science and technology"

* Index: Gender bias related to TVET and employment: -d4g (z-score)
* higher value means more biased against females in TVET 
g genderbias_tvet_bl = d4g_bl/(d4g_bl~=99)
lab var genderbias_tvet_bl "Bias against women in TVET"

* Bias against blue-collar jobs
g bluecollarbias_bl = (d4c_bl*(d4c_bl~=99) + d4d_bl*(d4d_bl~=99) + d4e_bl*(d4e_bl~=99))/((d4c_bl~=99) + (d4d_bl~=99) + (d4e_bl~=99))
lab var bluecollarbias_bl "Bias against blue-collar jobs"


* Index: Subjective knowledge on TVET path regarding admission, application, curriculum, student life, scholarship, graduation rate and future career path: (zscore, f1-7b)
g subj_tvet_bl = (f1b_bl*(f1b_bl~=99) + f2b_bl*(f2b_bl~=99) + f3b_bl*(f3b_bl~=99) + f4b_bl*(f4b_bl~=99) + f5b_bl*(f5b_bl~=99) + f6b_bl*(f6b_bl~=99) + f7b_bl*(f7b_bl~=99))/((f1b_bl~=99) + (f2b_bl~=99) + (f3b_bl~=99) + (f4b_bl~=99) + (f5b_bl~=99) + (f6b_bl~=99) + (f7b_bl~=99))

lab var subj_tvet_bl "Subjective knowledge on TVET"

g satisfied_tvet_bl = (f12b_bl*(f12b_bl~=99) + f13b_bl*(f13b_bl~=99))/((f12b_bl~=99) + (f13b_bl~=99))
lab var satisfied_tvet_bl "Expected satisfaction on TVET index"

foreach var in g7b_bl g8b_bl g9b_bl {
	destring `var', replace force
}

g approval_tvet_bl = (g7b_bl + g8b_bl+ g9b_bl)/3
lab var approval_tvet_bl "Approval index if choosing TVET"

foreach i in f m {
replace f16g_`i'_bl=substr(f16g_`i'_bl, 1, length(f16g_`i'_bl) - 1)
destring f16g_`i'_bl, replace 
}

g f16g_bl = f16g_f_bl if sex_bl==0
replace f16g_bl = f16g_m_bl if sex_bl==1
lab var f16g_bl "Belief index about peers' preference in TVET"

g ability_tvet_bl = (f10b_bl*(f10b_bl~=99) + f11b_bl*(f11b_bl~=99))/((f10b_bl~=99) + (f11b_bl~=99))
lab var ability_tvet_bl "Subjective assessment to enroll and complete TVET"

* Beliefs about future employment outcomes (g4, g5)
* 99 does not mean missing for g4*
foreach variable in g4a g4b g4c {
	destring `variable'_bl, g(i_`variable'_bl) force
}

foreach variable in g5a g5b g5c {
	cap g i_`variable'_bl = `variable'_bl
	recode i_`variable'_bl (99=.)
}

lab variable i_g4a_bl "Belief on own employment (\%) with univ degree"
lab variable i_g4b_bl "Belief on own employment (\%) with TVET degree"
lab variable i_g4c_bl "Belief on own employment (\%) with high school diploma"

lab variable i_g5a_bl "Belief on own earnings (Nu 000) with univ degree"
lab variable i_g5b_bl "Belief on own earnings (Nu 000) with TVET degree"
lab variable i_g5c_bl "Belief on own earnings (Nu 000) with high school diploma"

g perf_math_bl = c1a_bl*(c1a_bl~=99)/(c1a_bl~=99)
g perf_lang_bl = c1b_bl*(c1b_bl~=99)/(c1b_bl~=99)
g perf_eng_bl= c1c_bl*(c1c_bl~=99)/(c1c_bl~=99)
g perf_all_bl = c1k_bl*(c1k_bl~=99)/(c1k_bl~=99)

lab var perf_math_bl "Subjective performance: math"
lab var perf_lang_bl "Subjective performance: dzongkha"
lab var perf_eng_bl "Subjective performance: english"
lab var perf_all_bl "Subjective performance: overall"


keep i_f9g_bl i_f14g_bl attitude_stem_bl genderbias_tvet_bl bluecollarbias_bl subj_tvet_bl satisfied_tvet_bl approval_tvet_bl f16g_bl ability_tvet_bl i_g4b_bl i_g5b_bl age_bl male father_edu mother_edu perf_eng_bl perf_lang_bl big5extro_bl big5agree_bl big5consci_bl big5neuro_bl big5open_bl


save "$clean/comparison_`filename'.dta", replace
}
	